{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ENA from BioServices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The  European Nucleotide Archive (ENA) provides a comprehensive \n",
    "record of the world's nucleotide sequencing information, covering \n",
    "raw sequencing data, sequence assembly information and functional \n",
    "annotation. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from bioservices import ENA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "e = ENA()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# You can retrieve a read domain metadata im XML format:\n",
    "xml = e.get_data('ERA000092', 'xml')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">ENA|A00145|A00145.1 B.taurus BoIFN-alpha A mRNA \n",
      "CTGAAGGAAGGTCTTCAGAGAACCTAGAGAGCAGGTTCACAGAGTCACCCACCTCACCAG\n",
      "GCCAAAGCATCTGCAAGGTCCCCGATGGCCCCAGCCTGGTCCTTCCTGCTATCCCTGTTG\n",
      "CTGCTCAGCTGCAACGCCATCTGCTCTCTGGGTTGCCACCTGCCTCACACCCACAGCCTG\n",
      "GCCAACAGGAGGGTCCTGATGCTCCTGCAACAACTGAGAAGGGTCTCCCCTTCCTCCTGC\n",
      "CTGCAGGACAGAAATGACTTCGAATTCCTCCAGGAGGCTCTGGGTGGCAGCCAGTTGCAG\n",
      "AAGGCTCAAGCCATCTCTGTGCTCCACGAGGTGACCCAGCACACCTTCCAGCTCTTCAGC\n",
      "ACAGAGGGCTCGCCCGCCACGTGGGACAAGAGCCTCCTGGACAAGCTACGCGCTGCGCTG\n",
      "GATCAGCAGCTCACTGACCTGCAAGCCTGTCTGACGCAGGAGGAGGGGCTGCGAGGGGCT\n",
      "CCCCTGCTCAAGGAGGACTCCAGCCTGGCTGTGAGGAAATACTTCCACAGACTCACTCTC\n",
      "TATCTGCAAGAGAAGAGACACAGCCCTTGTGCCTGGGAGGTTGTCAGAGCAGAAGTCATG\n",
      "AGAGCCTTCTCTTCCTCAACAAACTTGCAGGAGAGTTTCAGGAGAAAGGACTGACACACA\n",
      "CCTGGTCCAACACGGAAA\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Retrieve assemble and annotated sequences in fasta format::\n",
    "fasta = e.get_data('A00145', 'fasta')\n",
    "print(fasta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ">ENA|A00145|A00145.1 B.taurus BoIFN-alpha A mRNA : Location:3..63\n",
      "GAAGGAAGGTCTTCAGAGAACCTAGAGAGCAGGTTCACAGAGTCACCCACCTCACCAGGC\n",
      "C\n"
     ]
    }
   ],
   "source": [
    "# Note that a subsequence can be retrieve:\n",
    "subfasta = e.get_data('A00145', 'fasta', fasta_range=[3,63])\n",
    "print(subfasta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# view the assembled an annotated sequences on ENA's website:\n",
    "e.view_data('A00145')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
